# look at a population source("../pop_sd.R") pop_main <- c(33, 27, 16, 18, 14, 19, 26, 7, 28, 30, 13, 20, 17, 21, 21, 27, 16, 21, 39, 7) # what does it look like hist( pop_main) # remember that a histogram, especially for small # sets of data, can change its shape if we change # the break ppoints hist( pop_main, breaks=seq(3,45,7) ) # or hist( pop_main, breaks=seq(5,41,4), main="doctored break points") # However the mean and standard deviation do not # depend on such break points mean( pop_main ) pop_sd( pop_main ) # Now, observe the change in the mean and standard # deviation if we add the same value to each item # in the population to get a new population pop_add_15 <- pop_main + 15 pop_add_15 mean( pop_add_15 ) pop_sd( pop_add_15 ) # The mean went up by 15 but the standard deviation # did not change. ########## # Now, observe the change in the mean and standard # deviation if we multiply each value in the population # by a given value. pop_mult_3 <- pop_main * 3 pop_mult_3 mean( pop_mult_3 ) pop_sd( pop_mult_3 ) # The effect of such multiplication was to change # both the mean and the standard deviation. ########### # Now look at the pattern of changing the mean to 0 # and then changing the standard deviation to 1 pop_mean_0 <- pop_main - mean( pop_main ) pop_mean_0 mean( pop_mean_0 ) pop_mean_0_sd <- pop_sd( pop_mean_0 ) pop_mean_0_sd pop_m0_sd1 <- pop_mean_0 / pop_mean_0_sd pop_m0_sd1 mean( pop_m0_sd1 ) pop_sd( pop_m0_sd1 ) # just to remind us that the values displayed # are rounded options(digits=22) pop_m0_sd1 options( digits=7 ) # go back and look at a histogram of this # new population hist( pop_m0_sd1 ) # so, now, let us generate a new population # but this time let us have mean=14.5 and # standard deviation=3.7 ######### # First set the standard deviation pop_sd3p7 <- pop_m0_sd1 * 3.7 mean( pop_sd3p7 ) pop_sd( pop_sd3p7 ) # Then change the mean to be 14.5 pop_m14p5_sd3p7 <- pop_sd3p7 + 14.5 mean( pop_m14p5_sd3p7 ) pop_sd( pop_m14p5_sd3p7 ) # look at the historgram of the new population hist( pop_m14p5_sd3p7) hist( pop_m14p5_sd3p7, breaks=seq( 7.1, 23.75, 1.85), main="doctored again") ########## # We will move onto a much larger population. # Here is a population of IQ scores. source("../gnrnd5.R") gnrnd5(43073899904, 15000100) # we can look at the first and last 15 values head( L1, 15) tail( L1, 15) # here is a summary of the population summary( L1 ) # and a histogram hist(L1, main="IQ values") # Then look at the mean and standard deviation pop_mean <- mean(L1) pop_mean pop_stndev <- pop_sd( L1 ) pop_stndev # Now redo the histogram using the info from # the mean and standard deviation to set break # points hist( L1, main="IQ values, using special breaks", breaks=seq(40,160, 7.5), right=FALSE) # and a box and whisker plot boxplot( L1, horizontal = TRUE, main="IQ values") # Mark off the box plot with standard deviation lines std_markers <-pop_mean+(-3:3)*pop_stndev std_markers abline( v=std_markers, lty="dashed", col="red") # Now find the 4th, 16th, 50th, 84th, and 96th percentiles # then compare those to the mean and standard deviation # markers q_marks <-quantile(L1, c(0.04, 0.16, 0.50, 0.84, 0.96)) q_marks abline( v=q_marks, lty="dotted", col="blue") # now convert L1 to L2 where L2 has mean 0 and # standard deviation 1 L2 <- (L1-pop_mean)/pop_stndev mean( L2 ) pop_sd( L2 ) hist( L2, right=FALSE ) boxplot( L2, horizontal=TRUE )